In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime
from impala.util import as_pandas
from collections import defaultdict
from operator import itemgetter
import cPickle as pickle
%matplotlib notebook
plt.style.use('ggplot')
In [ ]:
from impala.dbapi import connect
conn = connect(host="mycluster.domain.com", port=my_impala_port_number)
cur = conn.cursor()
cur.execute("use my_db")
In [ ]:
def find_rrt_freq(reasons):
'''
reasons is a python list of RRT reasons -- 1 line for each RRT event.
Each entry is a different RRT event; there may be multiple reasons per event
output: a pandas dataframe with the counts for each reason.
'''
rrt_reasons = defaultdict(int)
for reason in reasons:
otherreason = ''
if reason.lower().startswith("other:"):
# if the line starts with "other" --> the only reason is the otherreason(s)
otherreason = reason.lower().split('other:')[1]
rrts = []
else:
# if the line contains "other:" or not
splitreason = reason.lower().split('other:')
if len(splitreason) > 1:
# if an "other" reason exists, process it differently
otherreason = splitreason[1] # text of the line after 'other:'
primaryreason = splitreason[0].strip().strip('"')
rrts = primaryreason.split(',')
for rrt in rrts:
rrt = rrt.strip()
# loop through list of rrt reasons for patient & add to count tracker
if len(rrt) > 0:
# included len check b/c splitting on "other" above caused trailing comma
if rrt not in rrt_reasons.keys():
rrt_reasons[rrt] = 1
else:
rrt_reasons[rrt] += 1
if len(otherreason) > 0:
# handle the "other" reason(s)
otherreason = "other: " + otherreason.strip().strip('"')
if otherreason not in rrt_reasons.keys():
rrt_reasons[otherreason] = 1
else:
rrt_reasons[otherreason] += 1
return pd.DataFrame(rrt_reasons, index=['count']).transpose().sort_values('count', ascending=False).reset_index()
In [ ]:
def count_others(reasons):
'''
Count how many "Other" reasons there are, both occurring alone & with other reasons.
"other_counts" is a dict which contains "only_other" & "other_withothers" as keys
'''
other_counts = defaultdict(int)
other_counts['only_other'] = 0
other_counts['other_withothers'] = 0
for reason in reasons:
if 'other' in reason.lower():
if reason.lower().startswith('other'):
other_counts['only_other'] += 1
else:
other_counts['other_withothers'] +=1
print other_counts
In [ ]:
def count_staffconcern(reasons):
'''
input: list of reasons
Counts how many time staff concern line happens, both by itself and with other reasons.
'''
staff_counts = defaultdict(int)
staff_counts['by_itself'] = 0
staff_counts['with_other_reasons'] = 0
for reason in reasons:
if 'staff concern' in reason.lower():
if 'patient,' in reason.lower():
staff_counts['with_other_reasons'] += 1
else:
staff_counts['by_itself'] += 1
print staff_counts
In [ ]:
def avg_num_reasons(reasons):
'''
input: list of reasons; each entry is an RRT reason
ouput: average number of reasons per RRT call.
'''
reasoncount = 0.0
rrtcount = 0.0
for entry in reasons:
reasoncount += len(entry.split(','))
rrtcount +=1
return reasoncount/rrtcount
In [ ]:
query = '''
SELECT ce.event_tag
FROM encounter enc
INNER JOIN clinical_event ce
ON enc.encntr_id = ce.encntr_id
WHERE enc.loc_facility_cd='633867'
AND enc.encntr_complete_dt_tm < 4e12
AND ce.event_cd='54408578'
AND ce.result_status_cd NOT IN ('31', '36')
AND ce.valid_until_dt_tm > 4e12
AND ce.event_class_cd not in ('654645')
AND enc.admit_type_cd !='0'
AND enc.encntr_type_class_cd='391';
'''
cur.execute(query)
reasons = cur.fetchall() # would read result into a list of tuples, e.g. [('Other: iv start',),(...)...]
In [ ]:
# look at the result
reasons
In [ ]:
# Make the tuple within a list a simple list
reasons = [reason[0] for reason in reasons]
In [ ]:
reasons
In [ ]:
len(reasons)
In [ ]:
count_others(reasons)
In [ ]:
count_staffconcern(reasons)
In [ ]:
avg_num_reasons(reasons)
In [ ]:
df_reasons = find_rrt_freq(reasons)
In [ ]:
df_reasons
In [ ]:
plt.figure(figsize=(12,8))
plt.tight_layout
val = df_reasons['count'][0:15]
pos = np.arange(15)+0.5 #bar centers on the y axis
plt.barh(-pos, val, align='center')
plt.yticks(-pos, df_reasons['index'][0:15])
plt.tick_params(direction='in', labelsize='16', pad=1)
plt.xlabel('Frequency of Reason', fontsize='16')
plt.title('Top Reasons for RRT Event, Jan 2015 - Aug 2016', fontsize = '16')
plt.tight_layout()
# Run the line below to save an image of the chart
plt.savefig('RRT_top15reasons.png')